Get single-point-mutation tables (spm_tbl)
library(tidyverse)
Registered S3 method overwritten by 'dplyr':
method from
print.rowwise_df
[37m── [1mAttaching packages[22m ─────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──[39m
[37m[32m✓[37m [34mggplot2[37m 3.3.0 [32m✓[37m [34mpurrr [37m 0.3.3
[32m✓[37m [34mtibble [37m 3.0.0 [32m✓[37m [34mdplyr [37m 0.8.5
[32m✓[37m [34mtidyr [37m 1.0.0 [32m✓[37m [34mstringr[37m 1.4.0
[32m✓[37m [34mreadr [37m 1.3.1 [32m✓[37m [34mforcats[37m 0.5.0[39m
package ‘tibble’ was built under R version 3.6.2[37m── [1mConflicts[22m ────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31mx[37m [34mdplyr[37m::[32mfilter()[37m masks [34mstats[37m::filter()
[31mx[37m [34mdplyr[37m::[32mlag()[37m masks [34mstats[37m::lag()[39m
library(ggridges)
Attaching package: ‘ggridges’
The following object is masked from ‘package:ggplot2’:
scale_discrete_manual
library(cowplot)
********************************************************
Note: As of version 1.0.0, cowplot does not change the
default ggplot2 theme anymore. To recover the previous
behavior, execute:
theme_set(theme_cowplot())
********************************************************
library(bio3d)
library(penm)
Attaching package: ‘penm’
The following object is masked from ‘package:stats’:
smooth
library(jefuns)
# file paths and names
spm_path <- "data"
pdb_path <- "data" # pdb files repaired using FoldX (by Elisha)
pdb_prefix <- "RepairPDB_"
run_path <- "output/sc_mut_scan" # directory for this run
# global parameters
beta <- beta_boltzmann()
nmut_per_site <- 19
update_enm <- FALSE # use true to recalculate entropies
add_frust <- FALSE # use true to add frustration terms to energy functions
# other parameters
run_par = lst(
nmut_per_site = nmut_per_site,
v0 = 0, # in kcal/mol
mut_sd_min = 1,
mut_dl_sigma = .3,
fit_dg_thr = 0,
fix_model = "moran",
fix_n_eff = 1,
fix_mut_rate = 1,
beta = beta
)
param <- do.call(set_param, run_par)
# save parameters in run's folder
file_param <- file.path(run_path, "param.rda")
save(beta, nmut_per_site, update_enm, add_frust, run_par, param, file = file_param)
#dataset <- tibble(pdb = "1PYL", chain = "A")
dataset <- tibble(pdb = "2ACY", chain = "A")
# read files
dataset <- dataset %>%
mutate(wt = map2(pdb,chain, pdb_file_check,
path = pdb_path, prefix = pdb_prefix))
# separate info
dataset <- dataset %>%
mutate(missing_residues = map_lgl(wt, "missing_residues"),
n_sites_pdb = map_int(wt, "n_sites"),
n_unique_resno= map_int(wt, "n_unique_resno"))
# get rid of wt column
dataset <- dataset %>% dplyr::select(-wt)
# get rid of cases for which there are repeated resno
dataset <- dataset %>%
dplyr::filter(n_sites_pdb == n_unique_resno)
# get rid of cases with "missing residues"
dataset <- dataset %>%
dplyr::filter(!missing_residues)
dataset
dataset <- dataset %>%
mutate(wt = map2(pdb,chain, read_pdb_sc,
path = pdb_path, prefix = pdb_prefix)) %>%
filter(!is.na(wt))
Loading required package: pracma
Attaching package: ‘pracma’
The following object is masked from ‘package:purrr’:
cross
dataset
Check that it works when pdb_active_site is defined
pdb_id <- dataset$pdb
chain <- dataset$chain
wt <- read_pdb_sc(pdb_id, chain, pdb_path, pdb_prefix)
dummy = sample(wt$pdb_site, 1)
dummy = c(dummy - 1, dummy, dummy + 1)
dummy
[1] 97 98 99
wt <- init_prot(wt, pdb_site_active = dummy, sd_min = param$mut$sd_min)
str(wt)
List of 10
$ xyz : num [1:294] 11.81 5.71 -1.69 8.01 9.17 ...
$ pdb_site : int [1:98] 1 2 3 4 5 6 7 8 9 10 ...
$ bfactor : num [1:98] 79.5 86.9 56.7 83 32.2 ...
$ nsites : int 98
$ site : int [1:98] 1 2 3 4 5 6 7 8 9 10 ...
$ pdb_site_active: num [1:3] 97 98 99
$ site_active : int [1:2] 97 98
$ ind_active : num [1:6] 289 290 291 292 293 294
$ enm :List of 9
..$ graph : tibble [807 × 8] (S3: tbl_df/tbl/data.frame)
.. ..$ edge: chr [1:807] "1-2" "1-3" "1-4" "1-5" ...
.. ..$ i : int [1:807] 1 1 1 1 1 1 1 1 1 1 ...
.. ..$ j : int [1:807] 2 3 4 5 6 7 55 56 58 59 ...
.. ..$ sdij: int [1:807] 1 2 3 4 5 6 54 55 57 58 ...
.. ..$ lij : num [1:807] 7.43 9.93 10.44 3.9 9.73 ...
.. ..$ kij : num [1:807] 189 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 ...
.. ..$ dij : num [1:807] 7.43 9.93 10.44 3.9 9.73 ...
.. ..$ v0ij: num [1:807] 0 0 0 0 0 0 0 0 0 0 ...
..$ eij : num [1:807, 1:3] -0.511 -0.894 -0.921 -0.717 -0.455 ...
..$ kmat : num [1:294, 1:294] 69.4 -44.6 -73.2 -49.4 45 ...
..$ mode : num [1:288] 288 287 286 285 284 283 282 281 280 279 ...
..$ evalue : num [1:288] 613 607 605 596 583 ...
..$ cmat : num [1:294, 1:294] 0.07327 0.00449 0.02325 0.02716 -0.00107 ...
..$ umat : num [1:294, 1:288] 1.02e-05 -2.20e-06 -6.46e-05 -4.08e-06 1.04e-04 ...
..$ cmat_active: num [1:6, 1:6] 0.04746 0.0101 0.01315 0.00962 0.0013 ...
..$ kmat_active: num [1:6, 1:6] 37.8 26.9 -44.5 -14.9 -30.4 ...
$ energy :List of 5
..$ v_min : num 0
..$ dv_activation : num 0
..$ g_entropy : num 225
..$ g_entropy_activation: num 3.46
..$ v_stress : num 0
Check that it works when pdb_active_site is not defined
pdb_id <- dataset$pdb
chain <- dataset$chain
wt <- read_pdb_sc(pdb_id, chain, pdb_path, pdb_prefix)
wt <- init_prot(wt, sd_min = param$mut$sd_min)
str(wt)
List of 10
$ xyz : num [1:294] 11.81 5.71 -1.69 8.01 9.17 ...
$ pdb_site : int [1:98] 1 2 3 4 5 6 7 8 9 10 ...
$ bfactor : num [1:98] 79.5 86.9 56.7 83 32.2 ...
$ nsites : int 98
$ site : int [1:98] 1 2 3 4 5 6 7 8 9 10 ...
$ pdb_site_active: logi NA
$ site_active : logi NA
$ ind_active : logi NA
$ enm :List of 9
..$ graph : tibble [807 × 8] (S3: tbl_df/tbl/data.frame)
.. ..$ edge: chr [1:807] "1-2" "1-3" "1-4" "1-5" ...
.. ..$ i : int [1:807] 1 1 1 1 1 1 1 1 1 1 ...
.. ..$ j : int [1:807] 2 3 4 5 6 7 55 56 58 59 ...
.. ..$ sdij: int [1:807] 1 2 3 4 5 6 54 55 57 58 ...
.. ..$ lij : num [1:807] 7.43 9.93 10.44 3.9 9.73 ...
.. ..$ kij : num [1:807] 189 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 ...
.. ..$ dij : num [1:807] 7.43 9.93 10.44 3.9 9.73 ...
.. ..$ v0ij: num [1:807] 0 0 0 0 0 0 0 0 0 0 ...
..$ eij : num [1:807, 1:3] -0.511 -0.894 -0.921 -0.717 -0.455 ...
..$ kmat : num [1:294, 1:294] 69.4 -44.6 -73.2 -49.4 45 ...
..$ mode : num [1:288] 288 287 286 285 284 283 282 281 280 279 ...
..$ evalue : num [1:288] 613 607 605 596 583 ...
..$ cmat : num [1:294, 1:294] 0.07327 0.00449 0.02325 0.02716 -0.00107 ...
..$ umat : num [1:294, 1:288] 1.02e-05 -2.20e-06 -6.46e-05 -4.08e-06 1.04e-04 ...
..$ cmat_active: logi NA
..$ kmat_active: logi NA
$ energy :List of 5
..$ v_min : num 0
..$ dv_activation : logi NA
..$ g_entropy : num 225
..$ g_entropy_activation: logi NA
..$ v_stress : num 0
enm_msf_plot(wt, param)
Error in enm_msf_plot(wt, param) : could not find function "enm_msf_plot"
enm_modes_matrix_plot(wt)
nsites = wt$nsites
nmodes <- max(wt$enm$mode)
enm_modes_plot(wt, modes = c(1, 2, 3, 4, nmodes - 3, nmodes - 2, nmodes - 1, nmodes))
Note that entropy terms do not change w.r.t. wild-type.
df_energies <- tibble(prot = "wt", recalc_enm = F, as_tibble(wt$energy))
mut <- get_mutant(wt, wt, wt, param)
df_row <- tibble(prot = "mut", recalc_enm = F, as_tibble(mut$mut$energy))
df_energies <- rbind(df_energies, df_row)
mut_site <- get_mutant_site(wt, site_mut = 50, wt, wt, param)
df_row <- tibble(prot = "mut_site", recalc_enm = F, as_tibble(mut_site$energy))
df_energies <- rbind(df_energies, df_row)
mut_site_2 <- get_mutant_site_2(wt, 50, mutation = 1)
df_row <- tibble(prot = "mut_site_2", recalc_enm = F, as_tibble(mut_site_2$energy))
df_energies <- rbind(df_energies, df_row)
df_energies
NA
\(f_{ij} = k_{ij} \delta l_{ij}\) is the force in the direction of contact \(i-j\). \(de = K^{1/2} dr = C^{-1/2}df\), \(dr = C df\).
Therefore, in normal-mode representation: \(de_{n} = \sigma_n df_{n}\) and \(dr_n = sigma_n^2 df_n\).
By analogy, in site representation, check if it’s approximately so that \(de^2_{i} = \sigma_n^2 df^2_{i}\) and \(dr^2_i = \sigma_n^4 df^2_i\).
Remember that on average \(df^2_n \propto \frac{1}{\sigma_n^2}\) and probably \(df^2_i \propto 1 / \sigma_i^2\).
mut <- get_mutant_site(wt, 50, wt, wt, param)
response_site_plot(wt, mut)
response_nm_plot(wt, mut)
# get mutant table
mutation <- seq(from = 0, to = 10)
j <- wt$site
prot_site <- function(prot) prot$site
prot_mode <- function(prot) prot$enm$mode
prot_evalue <- function(prot) prot$enm$evalue
df_mutants <- expand_grid(wt = list(wt), j, mutation) %>%
mutate(mut = pmap(list(wt, j, mutation), get_mutant_site_2)) %>%
mutate(i = map(wt, "site"),
dr2ij = map2(wt, mut, dr2_site),
msfi = map(wt, msf_prot),
mode = map(wt, prot_mode),
evalue = map(wt, prot_evalue),
dr2nj = map2(wt, mut, dr2_nm)) %>%
select(-wt, -mut)
df_j <- tibble(j = wt$site, msfj = msf_prot(wt))
df_mutants <- inner_join(df_j, df_mutants) %>%
select(mutation, j, i, msfj, msfi, everything())
Joining, by = "j"
df_mutants
Full mean response matrix:
df_prs_site <- df_mutants %>%
select(j, mutation, i, msfj, msfi, dr2ij) %>%
unnest(c(i, msfi, dr2ij))
# dr2ij_mean matrix
pij_mean <- df_prs_site %>%
group_by(i, j) %>%
summarise(dr2ij_mean = mean(dr2ij),
dr2ij_sd = sd(dr2ij)) %>%
ggplot(aes(j, i, fill = sqrt(dr2ij_mean))) +
geom_tile() +
scale_fill_viridis_c() +
theme_cowplot() +
NULL +
theme(legend.position = "none") +
coord_fixed() +
ggtitle("sqrt(dr2ij_mean) matrix")
pij_mean
#dr2ij asymmetry
df <- df_prs_site %>%
group_by(i, j) %>%
summarise(dr2ij_mean = mean(dr2ij))
dr2ij <- df$dr2ij_mean
dim(dr2ij) <- c(length(wt$site), length(wt$site))
str(dr2ij)
num [1:98, 1:98] 0.0202 0.00912 0.00268 0.00246 0.00645 ...
dr2ji <- t(dr2ij)
df$dr2ji <- as.vector(dr2ji)
df %>%
filter(!(i == j)) %>%
ggplot(aes(dr2ij_mean, dr2ji)) +
geom_point(alpha = .1) +
theme_cowplot() +
geom_smooth() +
scale_x_log10() +
scale_y_log10() +
ggtitle("dr2ij is not symmetric")
# dr2ij over single mutation scan, vs. average over several mutation scans
df <- df_prs_site %>%
group_by(i, j) %>%
summarise(dr2ij_mean = mean(dr2ij))
df <- inner_join(df, df_prs_site)
Joining, by = c("i", "j")
df %>%
filter(mutation > 0, mutation < 5) %>%
ggplot(aes(dr2ij_mean, dr2ij, color = factor(mutation))) +
geom_point(size = .2, alpha = .2) +
geom_smooth(method = "lm") +
facet_wrap(~mutation) +
scale_x_log10() +
scale_y_log10() +
theme_cowplot() +
panel_border() +
ggtitle("dr2ij single scan similar to average over scans")
# dr2ij_sd vs. dr2ij_mean plot
pij <- df_prs_site %>%
group_by(i, j) %>%
summarise(dr2ij_mean = mean(dr2ij),
dr2ij_sd = sd(dr2ij)) %>%
ggplot(aes(dr2ij_mean, dr2ij_sd)) +
geom_point() +
geom_smooth(method = "lm") +
geom_abline() +
theme_cowplot() +
theme(legend.position = "none") +
coord_fixed() +
NULL +
ggtitle("sd(dr2ij) proportional to mean(dr2ij)")
pij
NA
NA
NA
NA
NA
NA
Response profile vs. site:
# Average response of each site
pi_site <- df_prs_site %>%
group_by(i, msfi) %>%
summarise(dr2i_mean = mean(dr2ij)) %>%
ungroup() %>%
ggplot(aes(i, dr2i_mean)) +
geom_line() +
theme_cowplot() +
NULL
pi_msf <- df_prs_site %>%
group_by(i, msfi) %>%
summarise(dr2i_mean = mean(dr2ij)) %>%
ungroup() %>%
ggplot(aes(msfi, dr2i_mean)) +
geom_point() +
geom_smooth() +
theme_cowplot() +
NULL
pi_rmsf <- df_prs_site %>%
group_by(i, msfi) %>%
summarise(dr2i_mean = mean(dr2ij)) %>%
ungroup() %>%
ggplot(aes(1 / msfi, dr2i_mean)) +
geom_point() +
geom_smooth() +
theme_cowplot() +
NULL
plot_grid(pi_site, plot_grid(pi_msf, pi_rmsf), ncol = 1)
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
Influence profile (dr2ij averaged over i):
# Average response of each site
pj_site <- df_prs_site %>%
group_by(j, msfj) %>%
summarise(dr2j_mean = mean(dr2ij)) %>%
ungroup() %>%
ggplot(aes(j, dr2j_mean)) +
geom_line() +
theme_cowplot() +
NULL
pj_msf <- df_prs_site %>%
group_by(j, msfj) %>%
summarise(dr2j_mean = mean(dr2ij)) %>%
ungroup() %>%
ggplot(aes(msfj, dr2j_mean)) +
geom_point() +
geom_smooth() +
theme_cowplot() +
NULL
pj_rmsf <- df_prs_site %>%
group_by(j, msfj) %>%
summarise(dr2j_mean = mean(dr2ij)) %>%
ungroup() %>%
ggplot(aes(1 / msfj, dr2j_mean)) +
geom_point() +
geom_smooth() +
theme_cowplot() +
NULL
plot_grid(pj_site, plot_grid(pj_msf, pj_rmsf), ncol = 1)
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
df_prs_site <- df_mutants %>%
select(j, mutation, i, dr2i) %>%
rename(dr2 = dr2i) %>%
unnest(i, dr2)
pij = df_prs_site %>%
group_by(i, j) %>%
summarise(dr2_mean = mean(dr2),
dr2_sd = sd(dr2)) %>%
ggplot(aes(i, j, fill = sqrt(dr2_mean))) +
geom_tile() +
scale_color_viridis_c() +
theme_cowplot()
pi <- df_prs_site %>%
group_by(i) %>%
summarise(dr2_mean = mean(dr2),
dr2_sd = sd(dr2)) %>%
ggplot(aes(i, dr2_mean)) +
geom_line() +
theme_cowplot()
pj <- df_prs_site %>%
group_by(j) %>%
summarise(dr2_mean = mean(dr2),
dr2_sd = sd(dr2)) %>%
ggplot(aes(j, dr2_mean)) +
geom_line() +
theme_cowplot()
plot_grid(pij, plot_grid(pi, pj), ncol = 1)
Now entropy terms should change.
get_mut_energy <- function(prot, site, mutation, update_enm, add_frust) {
mut <- get_mutant_site_2(wt,
site,
mutation, wt, wt,
sd_min = param$mut$sd_min,
dl_sigma = param$mut$dl_sigma,
model = param$enm$model,
d_max = param$enm$d_max,
v0 = param$enm$v0,
add_frust = add_frust,
update_enm = update_enm)
as_tibble(mut$energy)
}
df <- tibble(prot = c("wt", "mutff", "mutft", "muttf", "muttt"),
site = rep(80, 5),
wt = lst(wt),
mutation = c(0, 1, 1, 1, 1),
update_enm = c(F, F, F, T, T),
add_frust = c(F, F, T, F, T)) %>%
mutate(mut_energy = pmap(list(wt, site, mutation, update_enm, add_frust), get_mut_energy)) %>%
unnest(mut_energy) %>%
arrange(mutation, add_frust, update_enm) %>%
select(-prot, -wt, -mutation)
df
get_mut <-function(wt, site, mutation = 1, dl_sigma = 1) {
get_mutant_site_2(wt,
site,
mutation, wt, wt,
sd_min = param$mut$sd_min,
dl_sigma = dl_sigma,
model = param$enm$model,
d_max = param$enm$d_max,
v0 = param$enm$v0,
add_frust = F,
update_enm = T)
}
mut <- get_mut(wt, 87)
msf_wt <- msf_prot(wt)
msf_mut <- msf_prot(mut)
msf_wt
[1] 0.19221090 0.22650772 0.27652762 0.23801551 0.15540035 0.11379356 0.08988389 0.08264491 0.08221297 0.09245153
[11] 0.07088199 0.10983968 0.07866147 0.12245703 0.21576473 0.16983013 0.08066221 0.16625006 0.25819479 0.10956434
[21] 0.16064563 0.08016656 0.10045700 0.16565278 0.13648865 0.07399745 0.11437135 0.11827279 0.09858470 0.06925928
[31] 0.13993470 0.16437541 0.10566655 0.17435580 0.06912545 0.08351386 0.08866065 0.10189597 0.07512330 0.11410072
[41] 0.10100282 0.13513072 0.40612314 0.30208413 0.20867748 0.11092580 0.07813797 0.11848193 0.09705542 0.09154759
[51] 0.07543998 0.08182771 0.09289899 0.10136397 0.10067371 0.13664689 0.13084000 0.06887339 0.11194963 0.13527639
[61] 0.08295352 0.09847737 0.13516187 0.08207295 0.07153236 0.17685035 0.21240002 0.15926191 0.08324076 0.10887274
[71] 0.24866472 0.24213216 0.11729905 0.26032454 0.10959876 0.24008642 0.19338515 0.11560144 0.15058885 0.11115986
[81] 0.30865100 0.27992135 0.15388853 0.24487682 0.16795407 0.11886896 0.47436236 0.57496157 0.13781438 0.16247234
[91] 0.13287431 0.23750172 0.21078605 0.07891333 0.10806600 0.11934879 0.13205554 0.24254646
df <- tibble(site = wt$site, wt_msf = msf_wt, mut_msf = msf_mut)
df %>%
mutate(min_msf = map2_dbl(wt_msf, mut_msf, min),
max_msf = map2_dbl(wt_msf, mut_msf, max)) %>%
mutate(min_msf = wt_msf,
max_msf = mut_msf) %>%
pivot_longer(cols = c("wt_msf", "mut_msf"),
names_to = "protein",
values_to = "msf") %>%
ggplot(aes(site, msf, color = protein)) +
geom_line() +
geom_ribbon(aes(ymin = min_msf, ymax = max_msf), alpha = 1, color = NA, fill = "pink") +
scale_color_viridis_d() +
theme_cowplot() +
NULL
df %>%
ggplot(aes(site, (msf_mut - msf_wt)/msf_wt)) +
geom_line() +
theme_cowplot()
NA
NA
get_mut_energy <- function(site, mutation, wt,...) {
# gets mutant, returns only its energy terms
mut <- get_mutant_site_2(wt, site, mutation,... )
mut$energy
}
# set up site-mutant table
spm_tbl <- spm_tbl %>%
unnest(site, pdb_site, cn, wcn, bfactor, bfactor_enm, d_active, .drop = TRUE) %>%
mutate(mutation = replicate(n(), c(0:nmut_per_site), simplify = FALSE)) %>%
unnest(mutation, .drop = TRUE)
# get mutants
spm_tbl <- spm_tbl %>%
mutate(
wt_energy = map(site, ~ wt_energy),
mut_energy = map2(site, mutation, get_mut_energy,
wt = wt,
sd_min = param$mut$sd_min,
update_enm = update_enm,
add_frust = add_frust))
# prepare for output
spm_tbl <- spm_tbl %>%
rename(wt = wt_energy, mut = mut_energy) %>%
mutate(wt = map(wt, as_tibble),
mut = map(mut, as_tibble)) %>%
unnest(wt, mut, .sep = "_")
# output
pdb <- spm_tbl$pdb[[1]]
chain <- spm_tbl$chain[[1]]
out_file <- file.path(run_path,paste0("spm_",pdb, "_", chain, ".csv.gz"))
write.csv(spm_tbl, file = gzfile(out_file),row.names = FALSE)